require "rubygems"
require "hpricot"
require "local_libs"
require "sqlite3"

module Fanfou
  
  class Parser
    TYPES = ["status", "favorite", "privatemsg_receive", "privatemsg_sent", "album"]
    
    def initialize(url)
      self.class.init_db
      @url = url
      @count = 0
    end
    
    def parse(type)
      raise WrongTypeError.new("#{type} is not a valid type.") unless TYPES.include?(type)
      @type = type
      load_files
      send("parse_" + type)
      puts "#{type} parsed, count: #{@count}"
    end
    
    def parse_all
      TYPES.each do |t|
        parse(t)
      end
    end
    
    def parse_safe
      ["status", "favorite", "album"].each do |t|
        parse(t)
      end
    end
    
    private
    
    def self.init_db
      file = "fanfou.sqlite3"
      File.unlink(file) if File.exist?(file)
      SQLite3::Database.new(file) do |db|
        db.execute( "create table statuses ( `id` integer not null primary key, `content` varchar(300), `time` datetime, `method` varchar(50), `reply` varchar(100) )" )
        db.execute( "create table favorites ( `id` integer not null primary key, `author` varchar(50) not null, `content` varchar(300), `time` datetime, `method` varchar(50) )" )
        db.execute( "create table privatemsg_receives ( `id` integer not null primary key, `from` varchar(50) not null, `content` varchar(300), `time` datetime)" )
        db.execute( "create table privatemsg_sents ( `id` integer not null primary key, `to` varchar(50) not null, `content` varchar(300), `time` datetime, `parent` varchar(300) )" )
        db.execute( "create table albums ( `id` integer not null primary key, `full` varchar(30) not null, `thumb` varchar(30) not null, `title`  varchar(300))" )
        db.close
      end
    end
    
    def load_files
      dir = Dir.new(File.expand_path(@url))
      unless @type == "album"
        @files = Dir[File.join(dir.path, @type + "_*.html")].sort do |a, b|
          # A dirty hack to sort file list naturally.
          a.scan(/\d+/).last.to_i <=> b.scan(/\d+/).last.to_i 
        end
      else
        @files = [File.join(dir.path, "album.html")]
      end
    rescue
      raise WrongPathError.new("You may input a wrong archive path.")
    end
    
    def clear_db
      # This is not needed any more as we recreate a new dbfile at object initialization.
      eval("MeVenjDB::#{camelcase(@type)}.delete_all")
    end
    
    def camelcase(name)
      name.split("_").map(&:capitalize).join("")
    end
    
    def parse_privatemsg_receive
      clear_db
      @count = 0
      
      @files.each do |file|
        fh = open(file)
        doc = Hpricot(fh)
        doc.search("#stream/ol/li").each do |li|
          msg = MeVenjDB::PrivatemsgReceive.new
          li.search("a") do |a|
            msg.from = a.inner_html
          end
          li.search("span") do |span|
            if span.attributes['class'] == 'content'
              msg.content = span.inner_html
            else
              msg.time = Time.parse(span.inner_html)
            end
          end
          msg.save
          @count += 1
        end
        fh.close
      end
    end
    
    def parse_privatemsg_sent
      clear_db
      @count = 0
      
      @files.each do |file|
        fh = open(file)
        doc = Hpricot(fh)
        doc.search("#stream/ol/li").each do |li|
          msg = MeVenjDB::PrivatemsgSent.new
          li.search("a") do |a|
            msg.to = a.inner_html
          end
          li.search("span") do |span|
            if span.attributes['class'] == 'content'
              msg.content = span.inner_html
            else
              msg.time = Time.parse(span.inner_html)
            end
          end
          li.search("p") do |p|
            msg.parent = p.inner_html
          end
          msg.save
          @count += 1
        end
        fh.close
      end
    end
    
    def parse_album
      clear_db
      @count = 0
      
      @files.each do |file|
        fh = open(file)
        doc = Hpricot(fh)
        doc.search("#album/table//tr/td").each do |td|
          photo = MeVenjDB::Album.new
          td.search("a") do |a|
            photo.full = a.attributes["href"]
            a.search("img") do |img|
              photo.thumb = img.attributes["src"]
              photo.title = img.attributes["title"]
            end
          end
          photo.save
          @count += 1
        end
        fh.close
      end
    end
    
    def parse_status
      clear_db
      @count = 0
      @files.each do |file|
        fh = open(file)
        doc = Hpricot(fh)
        doc.search("#stream/ol/li").each do |li|
          msg = MeVenjDB::Status.new
          li.search("span") do |span|
            if span.attributes['class'] == 'content'
              msg.content = span.inner_html.gsub(/\0/, " ") # A work around for null byte.
            else
              span.search('a.time') do |a|
                msg.time = Time.parse(a.attributes["stime"])
              end
              span.search('span.method') do |s|
                msg.method = s.inner_html
              end
              span.search('span.reply') do |s|
                s.search("a") do |a|
                  msg.reply = a.inner_html
                end
              end
            end
          end
          msg.save
          @count += 1
        end
        fh.close
      end
    end
    
    def parse_favorite
      clear_db
      @count = 0
      @files.each do |file|
        fh = open(file)
        doc = Hpricot(fh)
        doc.search("#stream/ol/li").each do |li|
          msg = MeVenjDB::Favorite.new
          li.search("a.author") do |a|
            msg.author = a.inner_html
          end
          li.search("span") do |span|
            if span.attributes['class'] == 'content'
              msg.content = span.inner_html
            else
              span.search('a') do |a|
                msg.time = Time.parse(a.attributes["stime"])
              end
              span.search('span') do |s|
                msg.method = s.inner_html
              end
            end
          end
          msg.save
          @count += 1
        end
        fh.close
      end
    end
    
  end
  
  class WrongPathError < StandardError
  end
  
  class WrongTypeError < StandardError
  end
  
end